In this section, we read and process all the fire data for the California Campfire week.
# Combine data from different days
filelist = list.files(path="../data/fire/", pattern = "*.kml")
#assuming tab separated values with a header
datalist = lapply(filelist, function(x)st_read(paste("../data/fire/", x, sep="")))
#assuming the same header/columns for all files
fire <- do.call("rbind", datalist)
# Combine data from different days
filelist = list.files(path="../data/fire/", pattern = "*.txt")
#assuming tab separated values with a header
datalist = lapply(filelist, function(x)read.delim(paste("../data/fire/",x,sep=""), sep=",", strip.white=TRUE))
#assuming the same header/columns for all files
firetxt <- do.call("rbind", datalist)
firetxt <- firetxt %>%
rename(
longitude = Lon,
latitude = Lat,
date = YearDay,
time = Time,
satellite = Satellite,
method_of_detect = Method.of.Detect,
ecosys = Ecosys,
frp = Fire.RadPower
) %>%
mutate(
date = as.Date(as.character(date),
format = "%Y%j"),
method_of_detect = as.factor(method_of_detect),
satellite = as.factor(satellite),
ecosys = as.factor(ecosys),
time = substr(as.POSIXct(sprintf("%04.0f", time), format='%H%M'), 12, 16)
)
# Add a new column for date and time
firetxt <- firetxt %>%
mutate(
date_comp = as.POSIXct(paste(date, time),
format = "%Y-%m-%d %H:%M"),
frp = na_if(frp, -999.000)
)
We also use the California boundary file to restrict our view to only points in California.
cal_bound <- st_read("../ca-state-boundary/CA_State_TIGER2016.shp")
# Convert to the same coordinate system as HMS (4326)
cal_bound <- cal_bound %>%
st_set_crs(3857) %>%
st_transform(4326)
# Get an array of whether each observation is in California
in_bound <- lengths(st_intersects(fire, cal_bound))>0
# Get subset with indexing
# https://gis.stackexchange.com/questions/394954/r-using-st-intersects-to-classify-points-inside-outside-and-within-a-buffer
in_cali <- fire[in_bound,]
in_cali <- as.data.frame(in_cali) %>%
dplyr::select(-Name) %>%
# Remove prefix
mutate(Description = gsub('Fire Attributes: YearDay: ', '', Description)) %>%
# Replace column name strings with comma
mutate(Description = gsub('[a-zA-Z]*: ', ',', Description)) %>%
# Separate based on comma
separate(., col=Description,
into = c('date', 'time', 'satellite','method_of_detect'),
sep = ',')
in_cali <- in_cali %>%
# Remove unnecessary substrings
mutate(satellite = (str_remove_all(satellite,"Method of"))) %>%
# Convert into correct datetime
mutate( time = substr(as.POSIXct(sprintf("%04.0f", as.integer(time)), format='%H%M'), 12, 16),
start_time = as.POSIXct(paste(date, time),
format = "%Y%j %H:%M"),
date = as.POSIXct(date, format = "%Y%j")) %>%
dplyr::select(-time)
# Read light, medium, heavy separately
# Combine data from different days
filelist = list.files(path="../data/smoke/", pattern = "*.kml")
filelist <- filelist[-1]
#assuming tab separated values with a header
datalist = lapply(filelist, function(x)st_read(paste("../data/smoke/", x, sep=""), layer="Smoke (Light)"))
#assuming the same header/columns for all files
smoke_light <- do.call("rbind", datalist)
# smoke_light <- st_read("../data/smoke/smoke20181108.kml", layer="Smoke (Light)")
smoke_light <- as.data.frame(smoke_light) %>%
mutate(type="light")
#assuming tab separated values with a header
datalist = lapply(filelist, function(x)st_read(paste("../data/smoke/", x, sep=""), layer="Smoke (Medium)"))
smoke_med <- do.call("rbind", datalist)
# smoke_med <- st_read("../data/smoke/smoke20181108.kml", layer="Smoke (Medium)")
smoke_med <- as.data.frame(smoke_med) %>%
mutate(type="medium")
datalist = lapply(filelist, function(x)st_read(paste("../data/smoke/", x, sep=""), layer="Smoke (Heavy)"))
smoke_heavy <- do.call("rbind", datalist)
# smoke_heavy <- st_read("../data/smoke/smoke20181108.kml", layer="Smoke (Heavy)")
smoke_heavy <- as.data.frame(smoke_heavy) %>%
mutate(type="heavy")
smoke <- list(smoke_light, smoke_med, smoke_heavy)
smoke <- smoke %>%
reduce(full_join, by=c('Name', 'Description', 'type','geometry'))
smoke <- smoke %>%
dplyr::select(-Name) %>%
mutate(Description = gsub('Smoke Attributes: Start Time: ', '', Description)) %>%
mutate(Description = gsub('[a-zA-Z]*: ', ',', Description)) %>%
separate(., col=Description,
into = c('start_time', 'end_time', 'density', 'satellite'),
sep = ',')
# Switch off spherical geometry
sf_use_s2(FALSE)
# Clean time and add smoke area
smoke <- smoke %>%
# Remove unnecessary substrings
mutate(start_time = (str_remove_all(start_time,"[a-zA-Z]")),
end_time = (str_remove_all(end_time,"[a-zA-Z]"))) %>%
# Separate date and time based on space
separate(., col=start_time,
into=c('start_date', 'st'),
sep=' ') %>%
separate(., col=end_time,
into=c('end_date','et'),
sep=' ') %>%
# Convert into datetime object
mutate( st = substr(as.POSIXct(sprintf("%04.0f", as.integer(st)), format='%H%M'), 12, 16),
et = substr(as.POSIXct(sprintf("%04.0f", as.integer(et)), format='%H%M'), 12, 16),
start_time = as.POSIXct(paste(start_date, st),
format = "%Y%j %H:%M"),
end_time = as.POSIXct(paste(end_date, et),
format = "%Y%j %H:%M"),
area = st_area(smoke$geometry)) %>%
dplyr::select(-start_date, -end_date, -st, -et)
First, we plot all the fire detection points in California.
Then, we plot it making adjustments to the coordinate system.
Using these coordinates, we input into HDBSCAN specifying
minPts = 100.
We can plot the resulting clusters, excluding the outliers (belonging
to cluster = 0).
The points can also be shaded based on their membership probability.
The higher the probability, the more solid the color.